global projectdir "~"
global datadir "$projectdir/data"


*Start with employee grant mapping, megre on shock years
use "$datadir/raw_pulls/umetrics/FSRDC_2018/core_employee2018q4_pik.dta"

gen start_year = year(period_start_date)

*Just that we see you on a cfda code is enough, not trying to be fancy about it

drop if emp_number==""
drop if cfda==""

merge m:1 cfda using "$datadir/intermediate_files/cfda_shocks.dta", update
keep if _merge>2
drop _merge

keep cfda emp_number nshock year_shock start_year
duplicates drop

bys emp_number cfda: egen min_start_year = min(start_year)

keep if min_start_year==start_year

drop min_start_year

rename emp_number pik

compress
save "$datadir/intermediate_files/cfdashocks_pikstartyear_2018q4.dta"





*LEHD NACIS
cd $datadir/intermediate_files
use iris_lehd_jobhistory_2018q4_v6, clear

joinby pik using "$datadir/intermediate_files/cfdashocks_pikstartyear_2018q4.dta", update unmatched(both) _merge(_merge)
drop if _merge<3
drop _merge 

keep if year>=start_year
keep if firmage<=0

*local years 2007 2012 2017
local years 2012
foreach year in `years' {

gen naics4_seinunit_mode = regexs(1) if regexm(mode_naics`year'fnl_emp, "^([0-9][0-9][0-9][0-9])")
*gen naics4_seinunit_job = regexs(1) if regexm(naics`year'fnl_imp, "^([0-9][0-9][0-9][0-9])")

*local levels "seinunit_mode seinunit_job"
local levels "seinunit_mode"
foreach level in `levels' {

gen ht_`level'_`year' = 0
replace ht_`level'_`year' = 1 if naics4_`level' == "1131"
replace ht_`level'_`year' = 1 if naics4_`level' == "1132"
replace ht_`level'_`year' = 1 if naics4_`level' == "2111"
replace ht_`level'_`year' = 1 if naics4_`level' == "2211"
replace ht_`level'_`year' = 1 if naics4_`level' == "3241"
replace ht_`level'_`year' = 1 if naics4_`level' == "3251"
replace ht_`level'_`year' = 1 if naics4_`level' == "3252"
replace ht_`level'_`year' = 1 if naics4_`level' == "3253"
replace ht_`level'_`year' = 1 if naics4_`level' == "3254"
replace ht_`level'_`year' = 1 if naics4_`level' == "3255"
replace ht_`level'_`year' = 1 if naics4_`level' == "3259"
replace ht_`level'_`year' = 1 if naics4_`level' == "3332"
replace ht_`level'_`year' = 1 if naics4_`level' == "3336"
replace ht_`level'_`year' = 1 if naics4_`level' == "3339"
replace ht_`level'_`year' = 1 if naics4_`level' == "3341"
replace ht_`level'_`year' = 1 if naics4_`level' == "3342"
replace ht_`level'_`year' = 1 if naics4_`level' == "3343"
replace ht_`level'_`year' = 1 if naics4_`level' == "3344"
replace ht_`level'_`year' = 1 if naics4_`level' == "3345"
replace ht_`level'_`year' = 1 if naics4_`level' == "3346"
replace ht_`level'_`year' = 1 if naics4_`level' == "3353"
replace ht_`level'_`year' = 1 if naics4_`level' == "3364"
replace ht_`level'_`year' = 1 if naics4_`level' == "3369"
replace ht_`level'_`year' = 1 if naics4_`level' == "4324"
replace ht_`level'_`year' = 1 if naics4_`level' == "4861"
replace ht_`level'_`year' = 1 if naics4_`level' == "4862"
replace ht_`level'_`year' = 1 if naics4_`level' == "4869"
replace ht_`level'_`year' = 1 if naics4_`level' == "5112"
replace ht_`level'_`year' = 1 if naics4_`level' == "5161"
replace ht_`level'_`year' = 1 if naics4_`level' == "5171"
replace ht_`level'_`year' = 1 if naics4_`level' == "5172"
replace ht_`level'_`year' = 1 if naics4_`level' == "5173"
replace ht_`level'_`year' = 1 if naics4_`level' == "5174"
replace ht_`level'_`year' = 1 if naics4_`level' == "5179"
replace ht_`level'_`year' = 1 if naics4_`level' == "5181"
replace ht_`level'_`year' = 1 if naics4_`level' == "5182"
replace ht_`level'_`year' = 1 if naics4_`level' == "5211"
replace ht_`level'_`year' = 1 if naics4_`level' == "5232"
replace ht_`level'_`year' = 1 if naics4_`level' == "5413"
replace ht_`level'_`year' = 1 if naics4_`level' == "5415"
replace ht_`level'_`year' = 1 if naics4_`level' == "5416"
replace ht_`level'_`year' = 1 if naics4_`level' == "5417"
replace ht_`level'_`year' = 1 if naics4_`level' == "5511"
replace ht_`level'_`year' = 1 if naics4_`level' == "5612"
replace ht_`level'_`year' = 1 if naics4_`level' == "8112"

}

*drop naics4_seinunit_mode naics4_seinunit_job

}

keep naics2012fnl cfda nshock year_shock naics4_seinunit_mode ht_seinunit_mode_2012

duplicates drop

compress
save "$datadir/intermediate_files/naics_ht_start_lehd_cfdashocks_2018q4.dta"





*W2 NACIS
cd $datadir/intermediate_files
use iris_w2_jobhistory_2018q4_v4.dta, clear

joinby pik using "$datadir/intermediate_files/cfdashocks_pikstartyear_2018q4.dta", update unmatched(both) _merge(_merge)
drop if _merge<3
drop _merge 

keep if year>=start_year
keep if firmage<=0

local years 2012
foreach year in `years' {

gen naics4 = regexs(1) if regexm(naics`year', "^([0-9][0-9][0-9][0-9])")


gen ht_mode_`year' = 0
replace ht_mode_`year' = 1 if naics4 == "1131"
replace ht_mode_`year' = 1 if naics4 == "1132"
replace ht_mode_`year' = 1 if naics4 == "2111"
replace ht_mode_`year' = 1 if naics4 == "2211"
replace ht_mode_`year' = 1 if naics4 == "3241"
replace ht_mode_`year' = 1 if naics4 == "3251"
replace ht_mode_`year' = 1 if naics4 == "3252"
replace ht_mode_`year' = 1 if naics4 == "3253"
replace ht_mode_`year' = 1 if naics4 == "3254"
replace ht_mode_`year' = 1 if naics4 == "3255"
replace ht_mode_`year' = 1 if naics4 == "3259"
replace ht_mode_`year' = 1 if naics4 == "3332"
replace ht_mode_`year' = 1 if naics4 == "3336"
replace ht_mode_`year' = 1 if naics4 == "3339"
replace ht_mode_`year' = 1 if naics4 == "3341"
replace ht_mode_`year' = 1 if naics4 == "3342"
replace ht_mode_`year' = 1 if naics4 == "3343"
replace ht_mode_`year' = 1 if naics4 == "3344"
replace ht_mode_`year' = 1 if naics4 == "3345"
replace ht_mode_`year' = 1 if naics4 == "3346"
replace ht_mode_`year' = 1 if naics4 == "3353"
replace ht_mode_`year' = 1 if naics4 == "3364"
replace ht_mode_`year' = 1 if naics4 == "3369"
replace ht_mode_`year' = 1 if naics4 == "4324"
replace ht_mode_`year' = 1 if naics4 == "4861"
replace ht_mode_`year' = 1 if naics4 == "4862"
replace ht_mode_`year' = 1 if naics4 == "4869"
replace ht_mode_`year' = 1 if naics4 == "5112"
replace ht_mode_`year' = 1 if naics4 == "5161"
replace ht_mode_`year' = 1 if naics4 == "5171"
replace ht_mode_`year' = 1 if naics4 == "5172"
replace ht_mode_`year' = 1 if naics4 == "5173"
replace ht_mode_`year' = 1 if naics4 == "5174"
replace ht_mode_`year' = 1 if naics4 == "5179"
replace ht_mode_`year' = 1 if naics4 == "5181"
replace ht_mode_`year' = 1 if naics4 == "5182"
replace ht_mode_`year' = 1 if naics4 == "5211"
replace ht_mode_`year' = 1 if naics4 == "5232"
replace ht_mode_`year' = 1 if naics4 == "5413"
replace ht_mode_`year' = 1 if naics4 == "5415"
replace ht_mode_`year' = 1 if naics4 == "5416"
replace ht_mode_`year' = 1 if naics4 == "5417"
replace ht_mode_`year' = 1 if naics4 == "5511"
replace ht_mode_`year' = 1 if naics4 == "5612"
replace ht_mode_`year' = 1 if naics4 == "8112"

*drop naics4

}


keep naics2012 cfda nshock year_shock naics4 ht_mode_2012

duplicates drop

compress
save "$datadir/intermediate_files/naics_ht_start_w2_cfdashocks_2018q4.dta"
